package org.xqh.study.google.guava;

import com.baomidou.mybatisplus.core.toolkit.IdWorker;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import org.xqh.test.NumberUtils;
import org.xqh.utils.ProtostuffSerializer;
import org.apache.commons.io.Charsets;
import org.springframework.util.StopWatch;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * @ClassName BloomFilterDemo
 * @Description 布隆过滤器
 * @Author xuqianghui
 * @Date 2019/12/25 11:41
 * @Version 1.0
 */
public class BloomFilterDemo {

    private final static int insertions = 100000;//插入元素个数

    public static void main(String[] args) throws IOException {
        //初始化一个 存储string数据的布隆过滤器, 初始化大小为 100W
        // 默认假阳性率 0.03
        BloomFilter<String> bf = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), insertions);

        //存放所有 实际存在的key, 判断key是否存在.
        Set<String> sets = new HashSet<>(insertions);

        // 存放所有实际存在的key, 可以取出使用
        List<String> lists = new ArrayList<>(insertions);

        // 向三个容器初始化100W个 随机并且唯一的字符串
        for(int i = 0; i< insertions; i++){
            String uuid = IdWorker.get32UUID();
            bf.put(uuid);
            sets.add(uuid);
            lists.add(uuid);
        }

        byte[] bytes = ProtostuffSerializer.serialize(bf);


        InputStream in = new ByteArrayInputStream(bytes);
        BloomFilter nbf = BloomFilter.readFrom(in, Funnels.stringFunnel(Charsets.UTF_8));

        int right = 0;// 正确判断的次数
        int wrong = 0;//错误判断的次数
        StopWatch sw = new StopWatch(BloomFilterDemo.class.getName());
        sw.start("execute time");
        for(int i = 0; i < 10000; i++){
            String uuid = i % 100 == 0 ? lists.get(i % 100) : IdWorker.get32UUID();
            if(nbf.mightContain(uuid)){
                if(sets.contains(uuid)){
                    //判断正确
                    right++;
                    continue;
                }
                wrong ++;
            }
        }
        sw.stop();
        System.out.println(sw.prettyPrint());
        System.out.println("100w数据中 判断100个已存在数据的个数为==> "+right);
        System.out.println("在100w数据中, 判断9900个实际不存在的元素, 误认为存在的==>"+
                NumberUtils.divide(new BigDecimal(wrong * 100), new BigDecimal(9900))
        +"%, 命中率==>"+ NumberUtils.divide(new BigDecimal((9900 - wrong)*100), new BigDecimal(9900))+"%");

    }

}
